{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "\n",
    "import requests\n",
    "\n",
    "from Bio import ExPASy, SwissProt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "#explain why not biopython\n",
    "server = 'http://www.uniprot.org/uniprot'\n",
    "def do_request(server, ID='', **kwargs):\n",
    "    params = ''\n",
    "    req = requests.get('%s/%s%s' % (server, ID, params),params=kwargs)\n",
    "    if not req.ok:\n",
    "        req.raise_for_status()\n",
    "    return req"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "req = do_request(server, query='gene:p53 AND reviewed:yes',# AND organism:Human',\n",
    "                 format='tab',\n",
    "                 columns='id,entry name,length,organism,organism-id,database(PDB),database(HGNC)',\n",
    "                 limit='50')\n",
    "#We might revisit this for KEGG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Entry</th>\n",
       "      <th>Entry name</th>\n",
       "      <th>Length</th>\n",
       "      <th>Organism</th>\n",
       "      <th>ID</th>\n",
       "      <th>Cross-reference (PDB)</th>\n",
       "      <th>Cross-reference (HGNC)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>P56423</td>\n",
       "      <td>P53_MACFA</td>\n",
       "      <td>393</td>\n",
       "      <td>Macaca fascicularis (Crab-eating macaque) (Cyn...</td>\n",
       "      <td>9541</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Q9W678</td>\n",
       "      <td>P53_BARBU</td>\n",
       "      <td>369</td>\n",
       "      <td>Barbus barbus (Barbel) (Cyprinus barbus)</td>\n",
       "      <td>40830</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>P10361</td>\n",
       "      <td>P53_RAT</td>\n",
       "      <td>391</td>\n",
       "      <td>Rattus norvegicus (Rat)</td>\n",
       "      <td>10116</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Q9TTA1</td>\n",
       "      <td>P53_TUPBE</td>\n",
       "      <td>393</td>\n",
       "      <td>Tupaia belangeri (Common tree shrew) (Tupaia g...</td>\n",
       "      <td>37347</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>O57538</td>\n",
       "      <td>P53_XIPHE</td>\n",
       "      <td>342</td>\n",
       "      <td>Xiphophorus helleri (Green swordtail)</td>\n",
       "      <td>8084</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>O09185</td>\n",
       "      <td>P53_CRIGR</td>\n",
       "      <td>393</td>\n",
       "      <td>Cricetulus griseus (Chinese hamster) (Cricetul...</td>\n",
       "      <td>10029</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Q9W679</td>\n",
       "      <td>P53_TETMU</td>\n",
       "      <td>367</td>\n",
       "      <td>Tetraodon miurus (Congo puffer)</td>\n",
       "      <td>94908</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Q8SPZ3</td>\n",
       "      <td>P53_DELLE</td>\n",
       "      <td>387</td>\n",
       "      <td>Delphinapterus leucas (Beluga whale)</td>\n",
       "      <td>9749</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Q9TUB2</td>\n",
       "      <td>P53_PIG</td>\n",
       "      <td>386</td>\n",
       "      <td>Sus scrofa (Pig)</td>\n",
       "      <td>9823</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Q29537</td>\n",
       "      <td>P53_CANLF</td>\n",
       "      <td>381</td>\n",
       "      <td>Canis lupus familiaris (Dog) (Canis familiaris)</td>\n",
       "      <td>9615</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>O93379</td>\n",
       "      <td>P53_ICTPU</td>\n",
       "      <td>376</td>\n",
       "      <td>Ictalurus punctatus (Channel catfish) (Silurus...</td>\n",
       "      <td>7998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>P02340</td>\n",
       "      <td>P53_MOUSE</td>\n",
       "      <td>387</td>\n",
       "      <td>Mus musculus (Mouse)</td>\n",
       "      <td>10090</td>\n",
       "      <td>1HU8;2GEQ;2IOI;2IOM;2IOO;2P52;3EXJ;3EXL;</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>P25035</td>\n",
       "      <td>P53_ONCMY</td>\n",
       "      <td>396</td>\n",
       "      <td>Oncorhynchus mykiss (Rainbow trout) (Salmo gai...</td>\n",
       "      <td>8022</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>P79820</td>\n",
       "      <td>P53_ORYLA</td>\n",
       "      <td>352</td>\n",
       "      <td>Oryzias latipes (Japanese rice fish) (Japanese...</td>\n",
       "      <td>8090</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>O12946</td>\n",
       "      <td>P53_PLAFE</td>\n",
       "      <td>366</td>\n",
       "      <td>Platichthys flesus (European flounder) (Pleuro...</td>\n",
       "      <td>8260</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Q92143</td>\n",
       "      <td>P53_XIPMA</td>\n",
       "      <td>342</td>\n",
       "      <td>Xiphophorus maculatus (Southern platyfish) (Pl...</td>\n",
       "      <td>8083</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>P04637</td>\n",
       "      <td>P53_HUMAN</td>\n",
       "      <td>393</td>\n",
       "      <td>Homo sapiens (Human)</td>\n",
       "      <td>9606</td>\n",
       "      <td>1A1U;1AIE;1C26;1DT7;1GZH;1H26;1HS5;1JSP;1KZY;1...</td>\n",
       "      <td>11998;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Q7Z419</td>\n",
       "      <td>R144B_HUMAN</td>\n",
       "      <td>303</td>\n",
       "      <td>Homo sapiens (Human)</td>\n",
       "      <td>9606</td>\n",
       "      <td>NaN</td>\n",
       "      <td>21578;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Q7LG56</td>\n",
       "      <td>RIR2B_HUMAN</td>\n",
       "      <td>351</td>\n",
       "      <td>Homo sapiens (Human)</td>\n",
       "      <td>9606</td>\n",
       "      <td>2VUX;3HF1;4DJN;</td>\n",
       "      <td>17296;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Q42578</td>\n",
       "      <td>PER53_ARATH</td>\n",
       "      <td>335</td>\n",
       "      <td>Arabidopsis thaliana (Mouse-ear cress)</td>\n",
       "      <td>3702</td>\n",
       "      <td>1PA2;1QO4;</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>A7TJT7</td>\n",
       "      <td>SUB22_VANPO</td>\n",
       "      <td>442</td>\n",
       "      <td>Vanderwaltozyma polyspora (strain ATCC 22028 /...</td>\n",
       "      <td>436907</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>Q96A56</td>\n",
       "      <td>T53I1_HUMAN</td>\n",
       "      <td>240</td>\n",
       "      <td>Homo sapiens (Human)</td>\n",
       "      <td>9606</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18022;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>Q6PEE3</td>\n",
       "      <td>RIR2B_MOUSE</td>\n",
       "      <td>351</td>\n",
       "      <td>Mus musculus (Mouse)</td>\n",
       "      <td>10090</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>P61260</td>\n",
       "      <td>P53_MACFU</td>\n",
       "      <td>393</td>\n",
       "      <td>Macaca fuscata fuscata (Japanese macaque)</td>\n",
       "      <td>9543</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>P56424</td>\n",
       "      <td>P53_MACMU</td>\n",
       "      <td>393</td>\n",
       "      <td>Macaca mulatta (Rhesus macaque)</td>\n",
       "      <td>9544</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>P79892</td>\n",
       "      <td>P53_HORSE</td>\n",
       "      <td>280</td>\n",
       "      <td>Equus caballus (Horse)</td>\n",
       "      <td>9796</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>A7TJM9</td>\n",
       "      <td>DRS1_VANPO</td>\n",
       "      <td>752</td>\n",
       "      <td>Vanderwaltozyma polyspora (strain ATCC 22028 /...</td>\n",
       "      <td>436907</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Q8IZJ1</td>\n",
       "      <td>UNC5B_HUMAN</td>\n",
       "      <td>945</td>\n",
       "      <td>Homo sapiens (Human)</td>\n",
       "      <td>9606</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12568;</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Entry   Entry name  Length  \\\n",
       "0   P56423    P53_MACFA     393   \n",
       "1   Q9W678    P53_BARBU     369   \n",
       "2   P10361      P53_RAT     391   \n",
       "3   Q9TTA1    P53_TUPBE     393   \n",
       "4   O57538    P53_XIPHE     342   \n",
       "5   O09185    P53_CRIGR     393   \n",
       "6   Q9W679    P53_TETMU     367   \n",
       "7   Q8SPZ3    P53_DELLE     387   \n",
       "8   Q9TUB2      P53_PIG     386   \n",
       "9   Q29537    P53_CANLF     381   \n",
       "10  O93379    P53_ICTPU     376   \n",
       "11  P02340    P53_MOUSE     387   \n",
       "12  P25035    P53_ONCMY     396   \n",
       "13  P79820    P53_ORYLA     352   \n",
       "14  O12946    P53_PLAFE     366   \n",
       "15  Q92143    P53_XIPMA     342   \n",
       "16  P04637    P53_HUMAN     393   \n",
       "17  Q7Z419  R144B_HUMAN     303   \n",
       "18  Q7LG56  RIR2B_HUMAN     351   \n",
       "19  Q42578  PER53_ARATH     335   \n",
       "20  A7TJT7  SUB22_VANPO     442   \n",
       "21  Q96A56  T53I1_HUMAN     240   \n",
       "22  Q6PEE3  RIR2B_MOUSE     351   \n",
       "23  P61260    P53_MACFU     393   \n",
       "24  P56424    P53_MACMU     393   \n",
       "25  P79892    P53_HORSE     280   \n",
       "26  A7TJM9   DRS1_VANPO     752   \n",
       "27  Q8IZJ1  UNC5B_HUMAN     945   \n",
       "\n",
       "                                             Organism      ID  \\\n",
       "0   Macaca fascicularis (Crab-eating macaque) (Cyn...    9541   \n",
       "1            Barbus barbus (Barbel) (Cyprinus barbus)   40830   \n",
       "2                             Rattus norvegicus (Rat)   10116   \n",
       "3   Tupaia belangeri (Common tree shrew) (Tupaia g...   37347   \n",
       "4               Xiphophorus helleri (Green swordtail)    8084   \n",
       "5   Cricetulus griseus (Chinese hamster) (Cricetul...   10029   \n",
       "6                     Tetraodon miurus (Congo puffer)   94908   \n",
       "7                Delphinapterus leucas (Beluga whale)    9749   \n",
       "8                                    Sus scrofa (Pig)    9823   \n",
       "9     Canis lupus familiaris (Dog) (Canis familiaris)    9615   \n",
       "10  Ictalurus punctatus (Channel catfish) (Silurus...    7998   \n",
       "11                               Mus musculus (Mouse)   10090   \n",
       "12  Oncorhynchus mykiss (Rainbow trout) (Salmo gai...    8022   \n",
       "13  Oryzias latipes (Japanese rice fish) (Japanese...    8090   \n",
       "14  Platichthys flesus (European flounder) (Pleuro...    8260   \n",
       "15  Xiphophorus maculatus (Southern platyfish) (Pl...    8083   \n",
       "16                               Homo sapiens (Human)    9606   \n",
       "17                               Homo sapiens (Human)    9606   \n",
       "18                               Homo sapiens (Human)    9606   \n",
       "19             Arabidopsis thaliana (Mouse-ear cress)    3702   \n",
       "20  Vanderwaltozyma polyspora (strain ATCC 22028 /...  436907   \n",
       "21                               Homo sapiens (Human)    9606   \n",
       "22                               Mus musculus (Mouse)   10090   \n",
       "23          Macaca fuscata fuscata (Japanese macaque)    9543   \n",
       "24                    Macaca mulatta (Rhesus macaque)    9544   \n",
       "25                             Equus caballus (Horse)    9796   \n",
       "26  Vanderwaltozyma polyspora (strain ATCC 22028 /...  436907   \n",
       "27                               Homo sapiens (Human)    9606   \n",
       "\n",
       "                                Cross-reference (PDB) Cross-reference (HGNC)  \n",
       "0                                                 NaN                    NaN  \n",
       "1                                                 NaN                    NaN  \n",
       "2                                                 NaN                    NaN  \n",
       "3                                                 NaN                    NaN  \n",
       "4                                                 NaN                    NaN  \n",
       "5                                                 NaN                    NaN  \n",
       "6                                                 NaN                    NaN  \n",
       "7                                                 NaN                    NaN  \n",
       "8                                                 NaN                    NaN  \n",
       "9                                                 NaN                    NaN  \n",
       "10                                                NaN                    NaN  \n",
       "11           1HU8;2GEQ;2IOI;2IOM;2IOO;2P52;3EXJ;3EXL;                    NaN  \n",
       "12                                                NaN                    NaN  \n",
       "13                                                NaN                    NaN  \n",
       "14                                                NaN                    NaN  \n",
       "15                                                NaN                    NaN  \n",
       "16  1A1U;1AIE;1C26;1DT7;1GZH;1H26;1HS5;1JSP;1KZY;1...                 11998;  \n",
       "17                                                NaN                 21578;  \n",
       "18                                    2VUX;3HF1;4DJN;                 17296;  \n",
       "19                                         1PA2;1QO4;                    NaN  \n",
       "20                                                NaN                    NaN  \n",
       "21                                                NaN                 18022;  \n",
       "22                                                NaN                    NaN  \n",
       "23                                                NaN                    NaN  \n",
       "24                                                NaN                    NaN  \n",
       "25                                                NaN                    NaN  \n",
       "26                                                NaN                    NaN  \n",
       "27                                                NaN                 12568;  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#XXX - stringio\n",
    "import pandas as pd\n",
    "import io\n",
    "\n",
    "uniprot_list = pd.read_table(io.StringIO(req.text))\n",
    "uniprot_list.rename(columns={'Organism ID': 'ID'}, inplace=True)\n",
    "uniprot_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "p53_human = uniprot_list[uniprot_list.ID == 9606]['Entry'].tolist()[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "handle = ExPASy.get_sprot_raw(p53_human)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "sp_rec= SwissProt.read(handle)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "P53_HUMAN 393 Name=TP53; Synonyms=P53;\n",
      "RecName: Full=Cellular tumor antigen p53; AltName: Full=Antigen NY-CO-13; AltName: Full=Phosphoprotein p53; AltName: Full=Tumor suppressor p53;\n",
      "Homo sapiens (Human). (393, 43653, 'AD5C149FD8106131')\n",
      "MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD\n"
     ]
    }
   ],
   "source": [
    "print(sp_rec.entry_name, sp_rec.sequence_length, sp_rec.gene_name)\n",
    "print(sp_rec.description)\n",
    "print(sp_rec.organism, sp_rec.seqinfo)\n",
    "print(sp_rec.sequence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['FUNCTION: Acts as a tumor suppressor in many tumor types; induces growth arrest or apoptosis depending on the physiological circumstances and cell type. Involved in cell cycle regulation as a trans-activator that acts to negatively regulate cell division by controlling a set of genes required for this process. One of the activated genes is an inhibitor of cyclin-dependent kinases. Apoptosis induction seems to be mediated either by stimulation of BAX and FAS antigen expression, or by repression of Bcl-2 expression. In cooperation with mitochondrial PPIF is involved in activating oxidative stress-induced necrosis; the function is largely independent of transcription. Induces the transcription of long intergenic non-coding RNA p21 (lincRNA-p21) and lincRNA- Mkln1. LincRNA-p21 participates in TP53-dependent transcriptional repression leading to apoptosis and seem to have to effect on cell-cycle regulation. Implicated in Notch signaling cross-over. Prevents CDK7 kinase activity when associated to CAK complex in response to DNA damage, thus stopping cell cycle progression. Isoform 2 enhances the transactivation activity of isoform 1 from some but not all TP53-inducible promoters. Isoform 4 suppresses transactivation activity and impairs growth suppression mediated by isoform 1. Isoform 7 inhibits isoform 1-mediated apoptosis. Regulates the circadian clock by repressing CLOCK-ARNTL/BMAL1- mediated transcriptional activation of PER2 (PubMed:24051492). {ECO:0000269|PubMed:11025664, ECO:0000269|PubMed:12810724, ECO:0000269|PubMed:15186775, ECO:0000269|PubMed:15340061, ECO:0000269|PubMed:17317671, ECO:0000269|PubMed:17349958, ECO:0000269|PubMed:19556538, ECO:0000269|PubMed:20673990, ECO:0000269|PubMed:20959462, ECO:0000269|PubMed:22726440, ECO:0000269|PubMed:24051492, ECO:0000269|PubMed:9840937}.', 'COFACTOR: Name=Zn(2+); Xref=ChEBI:CHEBI:29105; Note=Binds 1 zinc ion per subunit.;', \"SUBUNIT: Interacts with AXIN1. Probably part of a complex consisting of TP53, HIPK2 and AXIN1 (By similarity). Binds DNA as a homotetramer. Interacts with histone acetyltransferases EP300 and methyltransferases HRMT1L2 and CARM1, and recruits them to promoters. Interacts (via C-terminus) with TAF1; when TAF1 is part of the TFIID complex. Interacts with ING4; this interaction may be indirect. Found in a complex with CABLES1 and TP73. Interacts with HIPK1, HIPK2, and TP53INP1. Interacts with WWOX. May interact with HCV core protein. Interacts with USP7 and SYVN1. Interacts with HSP90AB1. Interacts with CHD8; leading to recruit histone H1 and prevent transactivation activity (By similarity). Interacts with ARMC10, BANP, CDKN2AIP, NUAK1, STK11/LKB1, UHRF2 and E4F1. Interacts with YWHAZ; the interaction enhances TP53 transcriptional activity. Phosphorylation of YWHAZ on 'Ser-58' inhibits this interaction. Interacts (via DNA-binding domain) with MAML1 (via N-terminus). Interacts with MKRN1. Interacts with PML (via C-terminus). Interacts with MDM2; leading to ubiquitination and proteasomal degradation of TP53. Directly interacts with FBXO42; leading to ubiquitination and degradation of TP53. Interacts (phosphorylated at Ser-15 by ATM) with the phosphatase PP2A-PPP2R5C holoenzyme; regulates stress-induced TP53-dependent inhibition of cell proliferation. Interacts with PPP2R2A. Interacts with AURKA, DAXX, BRD7 and TRIM24. Interacts (when monomethylated at Lys-382) with L3MBTL1. Isoform 1 interacts with isoform 2 and with isoform 4. Interacts with GRK5. Binds to the CAK complex (CDK7, cyclin H and MAT1) in response to DNA damage. Interacts with CDK5 in neurons. Interacts with AURKB, SETD2, UHRF2 and NOC2L. Interacts (via N-terminus) with PTK2/FAK1; this promotes ubiquitination by MDM2. Interacts with PTK2B/PYK2; this promotes ubiquitination by MDM2. Interacts with PRKCG. Interacts with PPIF; the association implicates preferentially tetrameric TP53, is induced by oxidative stress and is impaired by cyclosporin A (CsA). Interacts with SNAI1; the interaction induces SNAI1 degradation via MDM2-mediated ubiquitination and inhibits SNAI1-induced cell invasion. Interacts with KAT6A. Interacts with UBC9. Interacts with ZNF385B; the interaction is direct. Interacts (via DNA-binding domain) with ZNF385A; the interaction is direct and enhances p53/TP53 transactivation functions on cell-cycle arrest target genes, resulting in growth arrest. Interacts with ANKRD2. Interacts with RFFL and RNF34; involved in p53/TP53 ubiquitination. Interacts with MTA1 and COP1. Interacts with CCAR2 (via N-terminus). Interacts with MORC3 (PubMed:17332504). Interacts (via C-terminus) with POU4F2 isoform 1 (via C-terminus) (PubMed:17145718). Interacts (via oligomerization region) with NOP53; the interaction is direct and may prevent the MDM2-mediated proteasomal degradation of TP53 (PubMed:22522597). Interacts with AFG1L; mediates mitochondrial translocation of TP53 (PubMed:27323408). Interacts with UBD (PubMed:25422469). {ECO:0000250, ECO:0000269|PubMed:12750254, ECO:0000269|PubMed:12810724, ECO:0000269|PubMed:12851404, ECO:0000269|PubMed:14534297, ECO:0000269|PubMed:14702041, ECO:0000269|PubMed:15053879, ECO:0000269|PubMed:15109303, ECO:0000269|PubMed:15136035, ECO:0000269|PubMed:15186775, ECO:0000269|PubMed:15701641, ECO:0000269|PubMed:15855171, ECO:0000269|PubMed:16219768, ECO:0000269|PubMed:16322561, ECO:0000269|PubMed:16376338, ECO:0000269|PubMed:16377624, ECO:0000269|PubMed:16402859, ECO:0000269|PubMed:16474402, ECO:0000269|PubMed:16793544, ECO:0000269|PubMed:16845383, ECO:0000269|PubMed:17015838, ECO:0000269|PubMed:17108107, ECO:0000269|PubMed:17121812, ECO:0000269|PubMed:17145718, ECO:0000269|PubMed:17170702, ECO:0000269|PubMed:17245430, ECO:0000269|PubMed:17317671, ECO:0000269|PubMed:17332504, ECO:0000269|PubMed:17591690, ECO:0000269|PubMed:17719541, ECO:0000269|PubMed:17904127, ECO:0000269|PubMed:17967874, ECO:0000269|PubMed:18585004, ECO:0000269|PubMed:18650397, ECO:0000269|PubMed:18996393, ECO:0000269|PubMed:19509332, ECO:0000269|PubMed:19515728, ECO:0000269|PubMed:19536131, ECO:0000269|PubMed:19556538, ECO:0000269|PubMed:19837670, ECO:0000269|PubMed:19880522, ECO:0000269|PubMed:20124405, ECO:0000269|PubMed:20142040, ECO:0000269|PubMed:20228809, ECO:0000269|PubMed:20364130, ECO:0000269|PubMed:20385133, ECO:0000269|PubMed:20660729, ECO:0000269|PubMed:20870725, ECO:0000269|PubMed:20959462, ECO:0000269|PubMed:21317932, ECO:0000269|PubMed:21952639, ECO:0000269|PubMed:22214662, ECO:0000269|PubMed:22522597, ECO:0000269|PubMed:22726440, ECO:0000269|PubMed:22945289, ECO:0000269|PubMed:23431171, ECO:0000269|PubMed:25422469, ECO:0000269|PubMed:25732823, ECO:0000269|PubMed:27323408, ECO:0000269|PubMed:8875926, ECO:0000269|PubMed:8875929, ECO:0000269|PubMed:9840937}.\", 'SUBUNIT: (Microbial infection) Interacts with cancer- associated/HPV E6 viral proteins leading to ubiquitination and degradation of TP53 giving a possible model for cell growth regulation. This complex formation requires an additional factor, E6-AP, which stably associates with TP53 in the presence of E6. {ECO:0000269|PubMed:2175676}.', 'SUBUNIT: (Microbial infection) Interacts with human cytomegalovirus/HHV-5 protein UL123. {ECO:0000269|PubMed:19776115}.', 'SUBUNIT: (Microbial infection) Interacts (via N-terminus) with human adenovirus 5 E1B-55K protein; this interaction leads to the inhibition of TP53 function and/or its degradation. {ECO:0000269|PubMed:25772236}.', 'INTERACTION: Self; NbExp=24; IntAct=EBI-366083, EBI-366083; P03070:- (xeno); NbExp=23; IntAct=EBI-366083, EBI-617698; P26663:- (xeno); NbExp=9; IntAct=EBI-366083, EBI-6838571; Q7L7W2:-; NbExp=2; IntAct=EBI-366083, EBI-7210801; Q8QW27:- (xeno); NbExp=2; IntAct=EBI-366083, EBI-6863726; Q13155:AIMP2; NbExp=6; IntAct=EBI-366083, EBI-745226; O95376:ARIH2; NbExp=5; IntAct=EBI-366083, EBI-711158; P49407:ARRB1; NbExp=5; IntAct=EBI-366083, EBI-743313; P29066:Arrb1 (xeno); NbExp=3; IntAct=EBI-366083, EBI-4303019; Q9UBL3:ASH2L; NbExp=7; IntAct=EBI-366083, EBI-540797; O95352-2:ATG7; NbExp=4; IntAct=EBI-366083, EBI-15980880; O15169:AXIN1; NbExp=4; IntAct=EBI-366083, EBI-710484; Q8N9N5:BANP; NbExp=3; IntAct=EBI-366083, EBI-744695; P10415:BCL2; NbExp=5; IntAct=EBI-366083, EBI-77694; Q07817-1:BCL2L1; NbExp=26; IntAct=EBI-366083, EBI-287195; P11274:BCR; NbExp=2; IntAct=EBI-366083, EBI-712838; O14503:BHLHE40; NbExp=11; IntAct=EBI-366083, EBI-711810; P38398:BRCA1; NbExp=2; IntAct=EBI-366083, EBI-349905; P51587:BRCA2; NbExp=7; IntAct=EBI-366083, EBI-79792; Q9NPI1:BRD7; NbExp=9; IntAct=EBI-366083, EBI-711221; Q9BX70:BTBD2; NbExp=2; IntAct=EBI-366083, EBI-710091; Q9Y297:BTRC; NbExp=2; IntAct=EBI-366083, EBI-307461; Q9ESJ1:Cables1 (xeno); NbExp=3; IntAct=EBI-366083, EBI-604411; Q9BWC9:CCDC106; NbExp=3; IntAct=EBI-366083, EBI-711501; P38936:CDKN1A; NbExp=3; IntAct=EBI-366083, EBI-375077; P17676:CEBPB; NbExp=4; IntAct=EBI-366083, EBI-969696; Q03701:CEBPZ; NbExp=2; IntAct=EBI-366083, EBI-1046778; O96017:CHEK2; NbExp=3; IntAct=EBI-366083, EBI-1180783; Q8NHY2:COP1; NbExp=5; IntAct=EBI-366083, EBI-1176214; Q92793:CREBBP; NbExp=11; IntAct=EBI-366083, EBI-81215; P45481:Crebbp (xeno); NbExp=9; IntAct=EBI-366083, EBI-296306; P55060:CSE1L; NbExp=5; IntAct=EBI-366083, EBI-286709; P68400:CSNK2A1; NbExp=2; IntAct=EBI-366083, EBI-347804; Q14999:CUL7; NbExp=5; IntAct=EBI-366083, EBI-308606; Q8IWT3:CUL9; NbExp=4; IntAct=EBI-366083, EBI-311123; Q9P0U4:CXXC1; NbExp=7; IntAct=EBI-366083, EBI-949911; Q9UER7:DAXX; NbExp=12; IntAct=EBI-366083, EBI-77321; Q92841:DDX17; NbExp=3; IntAct=EBI-366083, EBI-746012; P17844:DDX5; NbExp=6; IntAct=EBI-366083, EBI-351962; Q9NRR4:DROSHA; NbExp=5; IntAct=EBI-366083, EBI-528367; Q9BV47:DUSP26; NbExp=9; IntAct=EBI-366083, EBI-2924519; O14641:DVL2; NbExp=4; IntAct=EBI-366083, EBI-740850; P03120:E2 (xeno); NbExp=3; IntAct=EBI-366083, EBI-1779322; P03126:E6 (xeno); NbExp=5; IntAct=EBI-366083, EBI-1177242; P04019:E6 (xeno); NbExp=2; IntAct=EBI-366083, EBI-1177232; P06463:E6 (xeno); NbExp=5; IntAct=EBI-366083, EBI-1186926; Q09472:EP300; NbExp=21; IntAct=EBI-366083, EBI-447295; P15036:ETS2; NbExp=4; IntAct=EBI-366083, EBI-1646991; Q86XK2:FBXO11; NbExp=4; IntAct=EBI-366083, EBI-1047804; O43524:FOXO3; NbExp=2; IntAct=EBI-366083, EBI-1644164; P49841:GSK3B; NbExp=3; IntAct=EBI-366083, EBI-373586; P32780:GTF2H1; NbExp=5; IntAct=EBI-366083, EBI-715539; Q13547:HDAC1; NbExp=7; IntAct=EBI-366083, EBI-301834; Q86Z02:HIPK1; NbExp=2; IntAct=EBI-366083, EBI-692891; Q9H2X6:HIPK2; NbExp=3; IntAct=EBI-366083, EBI-348345; P09429:HMGB1; NbExp=9; IntAct=EBI-366083, EBI-389432; P61978:HNRNPK; NbExp=2; IntAct=EBI-366083, EBI-304185; P61978-2:HNRNPK; NbExp=2; IntAct=EBI-366083, EBI-7060731; Q9BUJ2:HNRNPUL1; NbExp=12; IntAct=EBI-366083, EBI-1018153; P02829:HSP82 (xeno); NbExp=8; IntAct=EBI-366083, EBI-8659; P34931:HSPA1L; NbExp=2; IntAct=EBI-366083, EBI-354912; P38646:HSPA9; NbExp=6; IntAct=EBI-366083, EBI-354932; P04792:HSPB1; NbExp=3; IntAct=EBI-366083, EBI-352682; P42858:HTT; NbExp=4; IntAct=EBI-366083, EBI-466029; Q7Z6Z7:HUWE1; NbExp=3; IntAct=EBI-366083, EBI-625934; Q16666-2:IFI16; NbExp=6; IntAct=EBI-366083, EBI-6273540; Q08619:Ifi205b (xeno); NbExp=2; IntAct=EBI-366083, EBI-8064290; O14920:IKBKB; NbExp=2; IntAct=EBI-366083, EBI-81266; Q9UHH9:IP6K2; NbExp=4; IntAct=EBI-366083, EBI-747509; Q6NYC1:JMJD6; NbExp=7; IntAct=EBI-366083, EBI-8464037; Q92993:KAT5; NbExp=3; IntAct=EBI-366083, EBI-399080; Q9H7Z6:KAT8; NbExp=2; IntAct=EBI-366083, EBI-896414; O60341-1:KDM1A; NbExp=6; IntAct=EBI-366083, EBI-15599570; Q8IZD2:KMT2E; NbExp=4; IntAct=EBI-366083, EBI-2689959; Q16363:LAMA4; NbExp=2; IntAct=EBI-366083, EBI-711505; P43356:MAGEA2B; NbExp=7; IntAct=EBI-366083, EBI-5650739; Q9UBF1:MAGEC2; NbExp=3; IntAct=EBI-366083, EBI-5651487; P46821:MAP1B; NbExp=6; IntAct=EBI-366083, EBI-764611; Q15759:MAPK11; NbExp=2; IntAct=EBI-366083, EBI-298304; Q8IW41:MAPKAPK5; NbExp=2; IntAct=EBI-366083, EBI-1201460; Q00987:MDM2; NbExp=96; IntAct=EBI-366083, EBI-389668; O15151:MDM4; NbExp=19; IntAct=EBI-366083, EBI-398437; Q15648:MED1; NbExp=3; IntAct=EBI-366083, EBI-394459; Q9UHC7:MKRN1; NbExp=8; IntAct=EBI-366083, EBI-373524; O75970:MPDZ; NbExp=3; IntAct=EBI-366083, EBI-821405; P04731:MT1A; NbExp=3; IntAct=EBI-366083, EBI-8045030; P55209:NAP1L1; NbExp=2; IntAct=EBI-3895849, EBI-356392; P19338:NCL; NbExp=2; IntAct=EBI-366083, EBI-346967; Q9Y618:NCOR2; NbExp=7; IntAct=EBI-366083, EBI-80830; P23511:NFYA; NbExp=11; IntAct=EBI-366083, EBI-389739; P25208:NFYB; NbExp=6; IntAct=EBI-366083, EBI-389728; Q9Y3T9:NOC2L; NbExp=8; IntAct=EBI-366083, EBI-751547; O60936:NOL3; NbExp=3; IntAct=EBI-366083, EBI-740992; P06748:NPM1; NbExp=6; IntAct=EBI-366083, EBI-78579; P06748-1:NPM1; NbExp=3; IntAct=EBI-366083, EBI-354150; Q15466:NR0B2; NbExp=3; IntAct=EBI-366083, EBI-3910729; P22736:NR4A1; NbExp=6; IntAct=EBI-366083, EBI-721550; O43847:NRDC; NbExp=6; IntAct=EBI-366083, EBI-2371631; P89055:NSP1 (xeno); NbExp=6; IntAct=EBI-366083, EBI-9522973; O60285:NUAK1; NbExp=5; IntAct=EBI-366083, EBI-1046789; P49757:NUMB; NbExp=5; IntAct=EBI-366083, EBI-915016; Q96FW1:OTUB1; NbExp=8; IntAct=EBI-366083, EBI-1058491; Q8TEW0:PARD3; NbExp=3; IntAct=EBI-366083, EBI-81968; P09874:PARP1; NbExp=5; IntAct=EBI-366083, EBI-355676; Q96KB5:PBK; NbExp=7; IntAct=EBI-366083, EBI-536853; P35232:PHB; NbExp=6; IntAct=EBI-366083, EBI-354213; O75925:PIAS1; NbExp=4; IntAct=EBI-366083, EBI-629434; O75928:PIAS2; NbExp=2; IntAct=EBI-366083, EBI-348555; Q8N2W9:PIAS4; NbExp=2; IntAct=EBI-366083, EBI-473160; Q13526:PIN1; NbExp=14; IntAct=EBI-366083, EBI-714158; P53350:PLK1; NbExp=6; IntAct=EBI-366083, EBI-476768; P29590:PML; NbExp=4; IntAct=EBI-366083, EBI-295890; P30405:PPIF; NbExp=4; IntAct=EBI-366083, EBI-5544229; P36873-1:PPP1CC; NbExp=2; IntAct=EBI-366083, EBI-356289; Q8WUF5:PPP1R13L; NbExp=12; IntAct=EBI-366083, EBI-5550163; P30153:PPP2R1A; NbExp=3; IntAct=EBI-366083, EBI-302388; Q13362:PPP2R5C; NbExp=4; IntAct=EBI-366083, EBI-1266156; Q05655:PRKCD; NbExp=4; IntAct=EBI-366083, EBI-704279; P61289:PSME3; NbExp=7; IntAct=EBI-366083, EBI-355546; Q05397:PTK2; NbExp=13; IntAct=EBI-366083, EBI-702142; Q06609:RAD51; NbExp=2; IntAct=EBI-366083, EBI-297202; Q06330:RBPJ; NbExp=5; IntAct=EBI-366083, EBI-632552; Q96PM5:RCHY1; NbExp=11; IntAct=EBI-366083, EBI-947779; Q6PCD5:RFWD3; NbExp=5; IntAct=EBI-366083, EBI-2129159; Q06587:RING1; NbExp=7; IntAct=EBI-366083, EBI-752313; P23396:RPS3; NbExp=4; IntAct=EBI-366083, EBI-351193; Q8N488:RYBP; NbExp=3; IntAct=EBI-366083, EBI-752324; P23297:S100A1; NbExp=2; IntAct=EBI-366083, EBI-743686; P29034:S100A2; NbExp=2; IntAct=EBI-366083, EBI-752230; P26447:S100A4; NbExp=7; IntAct=EBI-366083, EBI-717058; P04271:S100B; NbExp=2; IntAct=EBI-366083, EBI-458391; Q15424:SAFB; NbExp=5; IntAct=EBI-366083, EBI-348298; Q8WTS6:SETD7; NbExp=11; IntAct=EBI-366083, EBI-1268586; P31947:SFN; NbExp=4; IntAct=EBI-366083, EBI-476295; Q96ST3:SIN3A; NbExp=2; IntAct=EBI-366083, EBI-347218; Q96EB6:SIRT1; NbExp=18; IntAct=EBI-366083, EBI-1802965; Q923E4:Sirt1 (xeno); NbExp=4; IntAct=EBI-366083, EBI-1802585; Q15796:SMAD2; NbExp=7; IntAct=EBI-366083, EBI-1040141; Q9NRG4:SMYD2; NbExp=6; IntAct=EBI-366083, EBI-1055671; Q8R5A0:Smyd2 (xeno); NbExp=3; IntAct=EBI-366083, EBI-15612527; O95863:SNAI1; NbExp=2; IntAct=EBI-366083, EBI-1045459; Q06945:SOX4; NbExp=4; IntAct=EBI-366083, EBI-6672525; P08047:SP1; NbExp=6; IntAct=EBI-366083, EBI-298336; Q12772:SREBF2; NbExp=3; IntAct=EBI-366083, EBI-465059; Q96SB4:SRPK1; NbExp=3; IntAct=EBI-366083, EBI-539478; P63165:SUMO1; NbExp=3; IntAct=EBI-366083, EBI-80140; Q86TM6:SYVN1; NbExp=5; IntAct=EBI-366083, EBI-947849; P20226:TBP; NbExp=2; IntAct=EBI-366083, EBI-355371; P15884:TCF4; NbExp=2; IntAct=EBI-366083, EBI-533224; Q96GM8:TOE1; NbExp=3; IntAct=EBI-366083, EBI-717460; Q12888:TP53BP1; NbExp=6; IntAct=EBI-366083, EBI-396540; Q12888-1:TP53BP1; NbExp=17; IntAct=EBI-366083, EBI-8022649; Q13625:TP53BP2; NbExp=7; IntAct=EBI-366083, EBI-77642; Q9H3D4:TP63; NbExp=5; IntAct=EBI-366083, EBI-2337775; O88898:Tp63 (xeno); NbExp=2; IntAct=EBI-366083, EBI-2338025; P13693:TPT1; NbExp=7; IntAct=EBI-366083, EBI-1783169; O15164:TRIM24; NbExp=3; IntAct=EBI-366083, EBI-2130378; Q15672:TWIST1; NbExp=10; IntAct=EBI-366083, EBI-1797287; P26687:Twist1 (xeno); NbExp=4; IntAct=EBI-366083, EBI-6123119; P0CG48:UBC; NbExp=15; IntAct=EBI-366083, EBI-3390054; P63279:UBE2I; NbExp=3; IntAct=EBI-366083, EBI-80168; Q05086:UBE3A; NbExp=6; IntAct=EBI-366083, EBI-954357; Q96PU4:UHRF2; NbExp=3; IntAct=EBI-366083, EBI-625304; Q9H9J4:USP42; NbExp=2; IntAct=EBI-366083, EBI-2513638; Q9H9J4-2:USP42; NbExp=2; IntAct=EBI-366083, EBI-9118105; Q93009:USP7; NbExp=22; IntAct=EBI-366083, EBI-302474; P11473:VDR; NbExp=6; IntAct=EBI-366083, EBI-286357; Q99986:VRK1; NbExp=11; IntAct=EBI-366083, EBI-1769146; Q14191:WRN; NbExp=5; IntAct=EBI-366083, EBI-368417; Q9NZC7:WWOX; NbExp=4; IntAct=EBI-366083, EBI-4320739; O14980:XPO1; NbExp=3; IntAct=EBI-366083, EBI-355867; P12956:XRCC6; NbExp=2; IntAct=EBI-366083, EBI-353208; P61981:YWHAG; NbExp=5; IntAct=EBI-366083, EBI-359832; P63104:YWHAZ; NbExp=2; IntAct=EBI-366083, EBI-347088; Q8TAQ5:ZNF420; NbExp=4; IntAct=EBI-366083, EBI-3923307; Q9PST7:znf585b (xeno); NbExp=3; IntAct=EBI-366083, EBI-1782562;', 'SUBCELLULAR LOCATION: Cytoplasm. Nucleus. Nucleus, PML body. Endoplasmic reticulum. Mitochondrion matrix. Note=Interaction with BANP promotes nuclear localization. Recruited into PML bodies together with CHEK2. Translocates to mitochondria upon oxidative stress. Translocates to mitochondria in response to mitomycin C treatment (PubMed:27323408). {ECO:0000269|PubMed:27323408}.', 'SUBCELLULAR LOCATION: Isoform 1: Nucleus. Cytoplasm. Note=Predominantly nuclear but localizes to the cytoplasm when expressed with isoform 4.', 'SUBCELLULAR LOCATION: Isoform 2: Nucleus. Cytoplasm. Note=Localized mainly in the nucleus with minor staining in the cytoplasm.', 'SUBCELLULAR LOCATION: Isoform 3: Nucleus. Cytoplasm. Note=Localized in the nucleus in most cells but found in the cytoplasm in some cells.', 'SUBCELLULAR LOCATION: Isoform 4: Nucleus. Cytoplasm. Note=Predominantly nuclear but translocates to the cytoplasm following cell stress.', 'SUBCELLULAR LOCATION: Isoform 7: Nucleus. Cytoplasm. Note=Localized mainly in the nucleus with minor staining in the cytoplasm.', 'SUBCELLULAR LOCATION: Isoform 8: Nucleus. Cytoplasm. Note=Localized in both nucleus and cytoplasm in most cells. In some cells, forms foci in the nucleus that are different from nucleoli.', 'SUBCELLULAR LOCATION: Isoform 9: Cytoplasm.', 'ALTERNATIVE PRODUCTS: Event=Alternative promoter usage, Alternative splicing; Named isoforms=9; Name=1; Synonyms=p53, p53alpha;   IsoId=P04637-1; Sequence=Displayed; Name=2; Synonyms=I9RET, p53beta;   IsoId=P04637-2; Sequence=VSP_006535, VSP_006536;   Note=Expressed in quiescent lymphocytes. Seems to be   non-functional. May be produced at very low levels due to a   premature stop codon in the mRNA, leading to nonsense-mediated   mRNA decay.; Name=3; Synonyms=p53gamma;   IsoId=P04637-3; Sequence=VSP_040560, VSP_040561;   Note=Expressed in quiescent lymphocytes. Seems to be   non-functional. May be produced at very low levels due to a   premature stop codon in the mRNA, leading to nonsense-mediated   mRNA decay.; Name=4; Synonyms=Del40-p53, Del40-p53alpha, p47;   IsoId=P04637-4; Sequence=VSP_040832; Name=5; Synonyms=Del40-p53beta;   IsoId=P04637-5; Sequence=VSP_040832, VSP_006535, VSP_006536; Name=6; Synonyms=Del40-p53gamma;   IsoId=P04637-6; Sequence=VSP_040832, VSP_040560, VSP_040561; Name=7; Synonyms=Del133-p53, Del133-p53alpha;   IsoId=P04637-7; Sequence=VSP_040833;   Note=Produced by alternative promoter usage.; Name=8; Synonyms=Del133-p53beta;   IsoId=P04637-8; Sequence=VSP_040833, VSP_006535, VSP_006536;   Note=Produced by alternative promoter usage and alternative   splicing.; Name=9; Synonyms=Del133-p53gamma;   IsoId=P04637-9; Sequence=VSP_040833, VSP_040560, VSP_040561;   Note=Produced by alternative promoter usage and alternative   splicing.;', 'TISSUE SPECIFICITY: Ubiquitous. Isoforms are expressed in a wide range of normal tissues but in a tissue-dependent manner. Isoform 2 is expressed in most normal tissues but is not detected in brain, lung, prostate, muscle, fetal brain, spinal cord and fetal liver. Isoform 3 is expressed in most normal tissues but is not detected in lung, spleen, testis, fetal brain, spinal cord and fetal liver. Isoform 7 is expressed in most normal tissues but is not detected in prostate, uterus, skeletal muscle and breast. Isoform 8 is detected only in colon, bone marrow, testis, fetal brain and intestine. Isoform 9 is expressed in most normal tissues but is not detected in brain, heart, lung, fetal liver, salivary gland, breast or intestine. {ECO:0000269|PubMed:16131611}.', 'INDUCTION: Up-regulated in response to DNA damage. Isoform 2 is not induced in tumor cells in response to stress. {ECO:0000269|PubMed:10570149, ECO:0000269|PubMed:16131611}.', 'DOMAIN: The nuclear export signal acts as a transcriptional repression domain. The TADI and TADII motifs (residues 17 to 25 and 48 to 56) correspond both to 9aaTAD motifs which are transactivation domains present in a large number of yeast and animal transcription factors. {ECO:0000269|PubMed:17467953}.', 'PTM: Acetylated. Acetylation of Lys-382 by CREBBP enhances transcriptional activity. Deacetylation of Lys-382 by SIRT1 impairs its ability to induce proapoptotic program and modulate cell senescence. Deacetylation by SIRT2 impairs its ability to induce transcription activation in a AKT-dependent manner. {ECO:0000269|PubMed:10656795, ECO:0000269|PubMed:20228809, ECO:0000269|PubMed:23431171}.', 'PTM: Phosphorylation on Ser residues mediates transcriptional activation. Phosphorylated by HIPK1 (By similarity). Phosphorylation at Ser-9 by HIPK4 increases repression activity on BIRC5 promoter. Phosphorylated on Thr-18 by VRK1. Phosphorylated on Ser-20 by CHEK2 in response to DNA damage, which prevents ubiquitination by MDM2. Phosphorylated on Ser-20 by PLK3 in response to reactive oxygen species (ROS), promoting p53/TP53- mediated apoptosis. Phosphorylated on Thr-55 by TAF1, which promotes MDM2-mediated degradation. Phosphorylated on Ser-33 by CDK7 in a CAK complex in response to DNA damage. Phosphorylated on Ser-46 by HIPK2 upon UV irradiation. Phosphorylation on Ser-46 is required for acetylation by CREBBP. Phosphorylated on Ser-392 following UV but not gamma irradiation. Phosphorylated on Ser-15 upon ultraviolet irradiation; which is enhanced by interaction with BANP. Phosphorylated by NUAK1 at Ser-15 and Ser-392; was initially thought to be mediated by STK11/LKB1 but it was later shown that it is indirect and that STK11/LKB1-dependent phosphorylation is probably mediated by downstream NUAK1 (PubMed:21317932). It is unclear whether AMP directly mediates phosphorylation at Ser-15. Phosphorylated on Thr-18 by isoform 1 and isoform 2 of VRK2. Phosphorylation on Thr-18 by isoform 2 of VRK2 results in a reduction in ubiquitination by MDM2 and an increase in acetylation by EP300. Stabilized by CDK5-mediated phosphorylation in response to genotoxic and oxidative stresses at Ser-15, Ser-33 and Ser-46, leading to accumulation of p53/TP53, particularly in the nucleus, thus inducing the transactivation of p53/TP53 target genes. Phosphorylated by DYRK2 at Ser-46 in response to genotoxic stress. Phosphorylated at Ser-315 and Ser- 392 by CDK2 in response to DNA-damage. {ECO:0000250, ECO:0000269|PubMed:10570149, ECO:0000269|PubMed:10606744, ECO:0000269|PubMed:10884347, ECO:0000269|PubMed:10951572, ECO:0000269|PubMed:11239457, ECO:0000269|PubMed:11447225, ECO:0000269|PubMed:11546806, ECO:0000269|PubMed:11551930, ECO:0000269|PubMed:11554766, ECO:0000269|PubMed:11740489, ECO:0000269|PubMed:11780126, ECO:0000269|PubMed:12810724, ECO:0000269|PubMed:14702041, ECO:0000269|PubMed:15053879, ECO:0000269|PubMed:15701641, ECO:0000269|PubMed:15866171, ECO:0000269|PubMed:16377624, ECO:0000269|PubMed:16704422, ECO:0000269|PubMed:1705009, ECO:0000269|PubMed:17108107, ECO:0000269|PubMed:17254968, ECO:0000269|PubMed:17349958, ECO:0000269|PubMed:17591690, ECO:0000269|PubMed:17967874, ECO:0000269|PubMed:18022393, ECO:0000269|PubMed:20041275, ECO:0000269|PubMed:20124405, ECO:0000269|PubMed:20959462, ECO:0000269|PubMed:21317932, ECO:0000269|PubMed:2141171, ECO:0000269|PubMed:22214662, ECO:0000269|PubMed:9372954}.', 'PTM: Dephosphorylated by PP2A-PPP2R5C holoenzyme at Thr-55. SV40 small T antigen inhibits the dephosphorylation by the AC form of PP2A.', 'PTM: May be O-glycosylated in the C-terminal basic region. Studied in EB-1 cell line. {ECO:0000269|PubMed:8632915}.', 'PTM: Ubiquitinated by MDM2 and SYVN1, which leads to proteasomal degradation (PubMed:10722742, PubMed:12810724, PubMed:15340061, PubMed:17170702, PubMed:19880522). Ubiquitinated by RFWD3, which works in cooperation with MDM2 and may catalyze the formation of short polyubiquitin chains on p53/TP53 that are not targeted to the proteasome (PubMed:10722742, PubMed:12810724, PubMed:20173098). Ubiquitinated by MKRN1 at Lys-291 and Lys-292, which leads to proteasomal degradation (PubMed:19536131). Deubiquitinated by USP10, leading to its stabilization (PubMed:20096447). Ubiquitinated by TRIM24, RFFL, RNF34 and RNF125, which leads to proteasomal degradation (PubMed:19556538). Ubiquitination by TOPORS induces degradation (PubMed:19473992). Deubiquitination by USP7, leading to stabilization (PubMed:15053880). Isoform 4 is monoubiquitinated in an MDM2- independent manner (PubMed:15340061). Ubiquitinated by COP1, which leads to proteasomal degradation (PubMed:19837670). Ubiquitination and subsequent proteasomal degradation is negatively regulated by CCAR2 (PubMed:25732823). {ECO:0000269|PubMed:10722742, ECO:0000269|PubMed:12810724, ECO:0000269|PubMed:15053880, ECO:0000269|PubMed:15340061, ECO:0000269|PubMed:17170702, ECO:0000269|PubMed:18206965, ECO:0000269|PubMed:19473992, ECO:0000269|PubMed:19536131, ECO:0000269|PubMed:19556538, ECO:0000269|PubMed:19837670, ECO:0000269|PubMed:19880522, ECO:0000269|PubMed:20096447, ECO:0000269|PubMed:20173098, ECO:0000269|PubMed:25591766, ECO:0000269|PubMed:25732823}.', 'PTM: Monomethylated at Lys-372 by SETD7, leading to stabilization and increased transcriptional activation. Monomethylated at Lys- 370 by SMYD2, leading to decreased DNA-binding activity and subsequent transcriptional regulation activity. Lys-372 monomethylation prevents interaction with SMYD2 and subsequent monomethylation at Lys-370. Dimethylated at Lys-373 by EHMT1 and EHMT2. Monomethylated at Lys-382 by KMT5A, promoting interaction with L3MBTL1 and leading to repress transcriptional activity. Dimethylation at Lys-370 and Lys-382 diminishes p53 ubiquitination, through stabilizing association with the methyl reader PHF20. Demethylation of dimethylated Lys-370 by KDM1A prevents interaction with TP53BP1 and represses TP53-mediated transcriptional activation.', 'PTM: Sumoylated with SUMO1. Sumoylated at Lys-386 by UBC9. {ECO:0000269|PubMed:11124955, ECO:0000269|PubMed:22214662, ECO:0000269|Ref.36}.', 'DISEASE: Note=TP53 is found in increased amounts in a wide variety of transformed cells. TP53 is frequently mutated or inactivated in about 60% of cancers. TP53 defects are found in Barrett metaplasia a condition in which the normally stratified squamous epithelium of the lower esophagus is replaced by a metaplastic columnar epithelium. The condition develops as a complication in approximately 10% of patients with chronic gastroesophageal reflux disease and predisposes to the development of esophageal adenocarcinoma.', 'DISEASE: Esophageal cancer (ESCR) [MIM:133239]: A malignancy of the esophagus. The most common types are esophageal squamous cell carcinoma and adenocarcinoma. Cancer of the esophagus remains a devastating disease because it is usually not detected until it has progressed to an advanced incurable stage. Note=The disease is caused by mutations affecting the gene represented in this entry.', 'DISEASE: Li-Fraumeni syndrome (LFS) [MIM:151623]: Autosomal dominant familial cancer syndrome that in its classic form is defined by the existence of a proband affected by a sarcoma before 45 years with a first degree relative affected by any tumor before 45 years and another first degree relative with any tumor before 45 years or a sarcoma at any age. Other clinical definitions for LFS have been proposed (PubMed:8118819 and PubMed:8718514) and called Li-Fraumeni like syndrome (LFL). In these families affected relatives develop a diverse set of malignancies at unusually early ages. Four types of cancers account for 80% of tumors occurring in TP53 germline mutation carriers: breast cancers, soft tissue and bone sarcomas, brain tumors (astrocytomas) and adrenocortical carcinomas. Less frequent tumors include choroid plexus carcinoma or papilloma before the age of 15, rhabdomyosarcoma before the age of 5, leukemia, Wilms tumor, malignant phyllodes tumor, colorectal and gastric cancers. {ECO:0000269|PubMed:10484981, ECO:0000269|PubMed:1565144, ECO:0000269|PubMed:1737852, ECO:0000269|PubMed:1933902, ECO:0000269|PubMed:1978757, ECO:0000269|PubMed:2259385, ECO:0000269|PubMed:7887414, ECO:0000269|PubMed:8825920, ECO:0000269|PubMed:9452042}. Note=The disease is caused by mutations affecting the gene represented in this entry.', 'DISEASE: Squamous cell carcinoma of the head and neck (HNSCC) [MIM:275355]: A non-melanoma skin cancer affecting the head and neck. The hallmark of cutaneous SCC is malignant transformation of normal epidermal keratinocytes. Note=The gene represented in this entry is involved in disease pathogenesis.', 'DISEASE: Lung cancer (LNCR) [MIM:211980]: A common malignancy affecting tissues of the lung. The most common form of lung cancer is non-small cell lung cancer (NSCLC) that can be divided into 3 major histologic subtypes: squamous cell carcinoma, adenocarcinoma, and large cell lung cancer. NSCLC is often diagnosed at an advanced stage and has a poor prognosis. Note=The disease is caused by mutations affecting the gene represented in this entry.', 'DISEASE: Papilloma of choroid plexus (CPP) [MIM:260500]: A benign tumor of neuroectodermal origin that generally occurs in childhood, but has also been reported in adults. Although generally found within the ventricular system, choroid plexus papillomas can arise ectopically in the brain parenchyma or disseminate throughout the neuraxis. Patients present with signs and symptoms of increased intracranial pressure including headache, hydrocephalus, papilledema, nausea, vomiting, cranial nerve deficits, gait impairment, and seizures. {ECO:0000269|PubMed:12085209}. Note=The disease is caused by mutations affecting the gene represented in this entry.', 'DISEASE: Adrenocortical carcinoma (ADCC) [MIM:202300]: A malignant neoplasm of the adrenal cortex and a rare childhood tumor. It occurs with increased frequency in patients with Beckwith- Wiedemann syndrome and Li-Fraumeni syndrome. {ECO:0000269|PubMed:11481490}. Note=The disease is caused by mutations affecting the gene represented in this entry.', 'DISEASE: Basal cell carcinoma 7 (BCC7) [MIM:614740]: A common malignant skin neoplasm that typically appears on hair-bearing skin, most commonly on sun-exposed areas. It is slow growing and rarely metastasizes, but has potentialities for local invasion and destruction. It usually develops as a flat, firm, pale area that is small, raised, pink or red, translucent, shiny, and waxy, and the area may bleed following minor injury. Tumor size can vary from a few millimeters to several centimeters in diameter. {ECO:0000269|PubMed:21946351}. Note=Disease susceptibility is associated with variations affecting the gene represented in this entry.', 'SIMILARITY: Belongs to the p53 family. {ECO:0000305}.', 'WEB RESOURCE: Name=IARC TP53 mutation database; Note=Somatic and germline TP53 mutations in human cancers; URL=\"http://p53.iarc.fr/\";', 'WEB RESOURCE: Name=Atlas of Genetics and Cytogenetics in Oncology and Haematology; URL=\"http://atlasgeneticsoncology.org/Genes/P53ID88.html\";', 'WEB RESOURCE: Name=NIEHS-SNPs; URL=\"http://egp.gs.washington.edu/data/tp53/\";', 'WEB RESOURCE: Name=SHMPD; Note=The Singapore human mutation and polymorphism database; URL=\"http://shmpd.bii.a-star.edu.sg/gene.php?genestart=A&genename=TP53\";', 'WEB RESOURCE: Name=Wikipedia; Note=P53 entry; URL=\"https://en.wikipedia.org/wiki/P53\";']\n",
      "['3D-structure', 'Acetylation', 'Activator', 'Alternative promoter usage', 'Alternative splicing', 'Apoptosis', 'Biological rhythms', 'Cell cycle', 'Complete proteome', 'Cytoplasm', 'Disease mutation', 'DNA-binding', 'Endoplasmic reticulum', 'Glycoprotein', 'Host-virus interaction', 'Isopeptide bond', 'Li-Fraumeni syndrome', 'Metal-binding', 'Methylation', 'Mitochondrion', 'Necrosis', 'Nucleus', 'Phosphoprotein', 'Polymorphism', 'Reference proteome', 'Repressor', 'Transcription', 'Transcription regulation', 'Tumor suppressor', 'Ubl conjugation', 'Zinc']\n"
     ]
    }
   ],
   "source": [
    "print(sp_rec.comments)\n",
    "print(sp_rec.keywords)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Help on Record in module Bio.SwissProt object:\n",
      "\n",
      "class Record(builtins.object)\n",
      " |  Holds information from a SwissProt record.\n",
      " |  \n",
      " |  Attributes:\n",
      " |   - entry_name        Name of this entry, e.g. RL1_ECOLI.\n",
      " |   - data_class        Either 'STANDARD' or 'PRELIMINARY'.\n",
      " |   - molecule_type     Type of molecule, 'PRT',\n",
      " |   - sequence_length   Number of residues.\n",
      " |   - accessions        List of the accession numbers, e.g. ['P00321']\n",
      " |   - created           A tuple of (date, release).\n",
      " |   - sequence_update   A tuple of (date, release).\n",
      " |   - annotation_update A tuple of (date, release).\n",
      " |   - description       Free-format description.\n",
      " |   - gene_name         Gene name.  See userman.txt for description.\n",
      " |   - organism          The source of the sequence.\n",
      " |   - organelle         The origin of the sequence.\n",
      " |   - organism_classification  The taxonomy classification.  List of strings.\n",
      " |     (http://www.ncbi.nlm.nih.gov/Taxonomy/)\n",
      " |   - taxonomy_id       A list of NCBI taxonomy id's.\n",
      " |   - host_organism     A list of names of the hosts of a virus, if any.\n",
      " |   - host_taxonomy_id  A list of NCBI taxonomy id's of the hosts, if any.\n",
      " |   - references        List of Reference objects.\n",
      " |   - comments          List of strings.\n",
      " |   - cross_references  List of tuples (db, id1[, id2][, id3]).  See the docs.\n",
      " |   - keywords          List of the keywords.\n",
      " |   - features          List of tuples (key name, from, to, description).\n",
      " |     from and to can be either integers for the residue\n",
      " |     numbers, '<', '>', or '?'\n",
      " |   - protein_existence Numerical value describing the evidence for the existence of the protein.\n",
      " |   - seqinfo           tuple of (length, molecular weight, CRC32 value)\n",
      " |   - sequence          The sequence.\n",
      " |  \n",
      " |  Examples\n",
      " |  --------\n",
      " |  >>> import Bio.SwissProt as sp\n",
      " |  >>> example_filename = \"SwissProt/sp008\"\n",
      " |  >>> with open(example_filename) as handle:\n",
      " |  ...     records = sp.parse(handle)\n",
      " |  ...     for record in records:\n",
      " |  ...         print(record.entry_name)\n",
      " |  ...         print(\",\".join(record.accessions))\n",
      " |  ...         print(record.keywords)\n",
      " |  ...         print(repr(record.organism))\n",
      " |  ...         print(record.sequence[:20] + \"...\")\n",
      " |  ...\n",
      " |  1A02_HUMAN\n",
      " |  P01892,P06338,P30514,P30444,P30445,P30446,Q29680,Q29899,Q95352,Q29837,Q95380\n",
      " |  ['MHC I', 'Transmembrane', 'Glycoprotein', 'Signal', 'Polymorphism', '3D-structure']\n",
      " |  'Homo sapiens (Human).'\n",
      " |  MAVMAPRTLVLLLSGALALT...\n",
      " |  \n",
      " |  Methods defined here:\n",
      " |  \n",
      " |  __init__(self)\n",
      " |      Initialize the class.\n",
      " |  \n",
      " |  ----------------------------------------------------------------------\n",
      " |  Data descriptors defined here:\n",
      " |  \n",
      " |  __dict__\n",
      " |      dictionary for instance variables (if defined)\n",
      " |  \n",
      " |  __weakref__\n",
      " |      list of weak references to the object (if defined)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "help(sp_rec)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1497\n",
      "('CHAIN', 1, 393, 'Cellular tumor antigen p53.', 'PRO_0000185703')\n",
      "('DNA_BIND', 102, 292, '', '')\n",
      "('REGION', 1, 320, 'Interaction with CCAR2. {ECO:0000269|PubMed:25732823}.', '')\n",
      "('MOTIF', 17, 25, 'TADI.', '')\n",
      "('METAL', 176, 176, 'Zinc.', '')\n",
      "('SITE', 120, 120, 'Interaction with DNA.', '')\n",
      "('MOD_RES', 9, 9, 'Phosphoserine; by HIPK4. {ECO:0000269|PubMed:18022393}.', '')\n",
      "('CROSSLNK', 291, 291, 'Glycyl lysine isopeptide (Lys-Gly) (interchain with G-Cter in ubiquitin). {ECO:0000269|PubMed:19536131}.', '')\n",
      "('VAR_SEQ', 1, 132, 'Missing (in isoform 7, isoform 8 and isoform 9). {ECO:0000303|PubMed:16131611}.', 'VSP_040833')\n",
      "('VARIANT', 5, 5, 'Q -> H (in a sporadic cancer; somatic mutation; abolishes strongly phosphorylation).', 'VAR_044543')\n",
      "('MUTAGEN', 15, 15, 'S->A: Loss of interaction with PPP2R5C, PPP2CA AND PPP2R1A. {ECO:0000269|PubMed:17967874}.', '')\n",
      "('HELIX', 3, 6, '{ECO:0000244|PDB:5HOU}.', '')\n",
      "('TURN', 8, 10, '{ECO:0000244|PDB:5HOU}.', '')\n",
      "('STRAND', 27, 29, '{ECO:0000244|PDB:2K8F}.', '')\n",
      "732\n",
      "dict_keys(['EMBL', 'CCDS', 'PIR', 'RefSeq', 'UniGene', 'PDB', 'PDBsum', 'DisProt', 'ProteinModelPortal', 'SMR', 'BioGrid', 'CORUM', 'DIP', 'ELM', 'IntAct', 'MINT', 'STRING', 'BindingDB', 'ChEMBL', 'DrugBank', 'MoonDB', 'TCDB', 'iPTMnet', 'PhosphoSitePlus', 'BioMuta', 'DMDM', 'SWISS-2DPAGE', 'EPD', 'MaxQB', 'PaxDb', 'PeptideAtlas', 'PRIDE', 'ProteomicsDB', 'DNASU', 'Ensembl', 'GeneID', 'KEGG', 'UCSC', 'CTD', 'DisGeNET', 'EuPathDB', 'GeneCards', 'GeneReviews', 'HGNC', 'HPA', 'MalaCards', 'MIM', 'neXtProt', 'OpenTargets', 'Orphanet', 'PharmGKB', 'eggNOG', 'GeneTree', 'HOVERGEN', 'InParanoid', 'KO', 'OMA', 'OrthoDB', 'PhylomeDB', 'TreeFam', 'Reactome', 'SABIO-RK', 'SignaLink', 'SIGNOR', 'ChiTaRS', 'EvolutionaryTrace', 'GeneWiki', 'GenomeRNAi', 'PMAP-CutDB', 'PRO', 'Proteomes', 'Bgee', 'ExpressionAtlas', 'Genevisible', 'GO', 'CDD', 'Gene3D', 'InterPro', 'PANTHER', 'Pfam', 'PRINTS', 'SUPFAM', 'PROSITE'])\n",
      "128\n",
      "('GO:0005737', 'C:cytoplasm', 'IDA:UniProtKB')\n",
      "('GO:0005524', 'F:ATP binding', 'IDA:UniProtKB')\n",
      "('GO:0006914', 'P:autophagy', 'IMP:CAFA')\n"
     ]
    }
   ],
   "source": [
    "done_features = set()\n",
    "print(len(sp_rec.features))\n",
    "for feature in sp_rec.features:\n",
    "    if feature[0] in done_features:\n",
    "        continue\n",
    "    else:\n",
    "        done_features.add(feature[0])\n",
    "        print(feature)\n",
    "print(len(sp_rec.cross_references))\n",
    "per_source = defaultdict(list)\n",
    "for xref in sp_rec.cross_references:\n",
    "    source = xref[0]\n",
    "    per_source[source].append(xref[1:])\n",
    "print(per_source.keys())\n",
    "done_GOs = set()\n",
    "print(len(per_source['GO']))\n",
    "for annot in per_source['GO']:\n",
    "    if annot[1][0] in done_GOs:\n",
    "        continue\n",
    "    else:\n",
    "        done_GOs.add(annot[1][0])\n",
    "        print(annot)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
